[HVM] Save/restore: make sure all ioreqs are gone when a domain is saved
authorTim Deegan <Tim.Deegan@xensource.com>
Thu, 8 Mar 2007 16:38:15 +0000 (16:38 +0000)
committerTim Deegan <Tim.Deegan@xensource.com>
Thu, 8 Mar 2007 16:38:15 +0000 (16:38 +0000)
by pausing the domain and pulling them through.
Signed-off-by: Tim Deegan <Tim.Deegan@xensource.com>
tools/ioemu/hw/ide.c
tools/ioemu/target-i386-dm/helper2.c
tools/ioemu/vl.h
tools/libxc/xc_hvm_restore.c
tools/libxc/xc_hvm_save.c
tools/libxc/xenguest.h
tools/libxc/xg_private.c
xen/arch/x86/hvm/hvm.c
xen/include/public/hvm/hvm_op.h

index c4db9fe738567315450f733169ae05b6d875fc6d..274dd8f8503c43757b3e72f280d373934077478e 100644 (file)
@@ -396,17 +396,41 @@ typedef struct PCIIDEState {
 
 #ifdef DMA_MULTI_THREAD
 
+static pthread_t ide_dma_thread;
 static int file_pipes[2];
 
 static void ide_dma_loop(BMDMAState *bm);
 static void dma_thread_loop(BMDMAState *bm);
 
+extern int suspend_requested;
 static void *dma_thread_func(void* opaque)
 {
     BMDMAState* req;
-
-    while (read(file_pipes[0], &req, sizeof(req))) {
-        dma_thread_loop(req);
+    fd_set fds;
+    int rv, nfds = file_pipes[0] + 1;
+    struct timeval tm;
+
+    while (1) {
+
+        /* Wait at most a second for the pipe to become readable */
+        FD_ZERO(&fds);
+        FD_SET(file_pipes[0], &fds);
+        tm.tv_sec = 1;
+        tm.tv_usec = 0;
+        rv = select(nfds, &fds, NULL, NULL, &tm);
+        
+        if (rv != 0) {
+            if (read(file_pipes[0], &req, sizeof(req)) == 0)
+                return NULL;
+            dma_thread_loop(req);
+        } else {
+            if (suspend_requested)  {
+                /* Need to tidy up the DMA thread so that we don't end up 
+                 * finishing operations after the domain's ioreqs are 
+                 * drained and its state saved */
+                return NULL;
+            }
+        }
     }
 
     return NULL;
@@ -414,24 +438,41 @@ static void *dma_thread_func(void* opaque)
 
 static void dma_create_thread(void)
 {
-    pthread_t tid;
     int rt;
+    pthread_attr_t a;
 
     if (pipe(file_pipes) != 0) {
         fprintf(stderr, "create pipe failed\n");
         exit(1);
     }
 
-    if ((rt = pthread_create(&tid, NULL, dma_thread_func, NULL))) {
+    if ((rt = pthread_attr_init(&a))
+        || (rt = pthread_attr_setdetachstate(&a, PTHREAD_CREATE_JOINABLE))) {
+        fprintf(stderr, "Oops, dma thread attr setup failed, errno=%d\n", rt);
+        exit(1);
+    }    
+    
+    if ((rt = pthread_create(&ide_dma_thread, &a, dma_thread_func, NULL))) {
         fprintf(stderr, "Oops, dma thread creation failed, errno=%d\n", rt);
         exit(1);
     }
+}
 
-    if ((rt = pthread_detach(tid))) {
-        fprintf(stderr, "Oops, dma thread detachment failed, errno=%d\n", rt);
-        exit(1);
+void ide_stop_dma_thread(void)
+{
+    int rc;
+    /* Make sure the IDE DMA thread is stopped */
+    if ( (rc = pthread_join(ide_dma_thread, NULL)) != 0 )
+    {
+        fprintf(stderr, "Oops, error collecting IDE DMA thread (%s)\n", 
+                strerror(rc));
     }
 }
+
+#else
+void ide_stop_dma_thread(void)
+{
+}
 #endif /* DMA_MULTI_THREAD */
 
 #if defined(__ia64__)
index dc3394d5b783f2e5fc97f2587b1f993ac41754a1..7a5f3412c9dd75c021b4351f8d622130e22ff452 100644 (file)
@@ -577,7 +577,28 @@ int main_loop(void)
         destroy_hvm_domain();
     else {
         char qemu_file[20];
+        ioreq_t *req;
+        int rc;
+
         sprintf(qemu_file, "/tmp/xen.qemu-dm.%d", domid);
+        xc_domain_pause(xc_handle, domid);
+
+        /* Pull all outstanding ioreqs through the system */
+        handle_buffered_io(env);
+        main_loop_wait(1); /* For the select() on events */
+        
+        /* Stop the IDE thread */
+        ide_stop_dma_thread();
+
+        /* Make sure that all outstanding IO responses are handled too */ 
+        if ( xc_hvm_drain_io(xc_handle, domid) != 0 )
+        {
+            fprintf(stderr, "error clearing ioreq rings (%s)\n", 
+                    strerror(errno));
+            return -1;
+        }
+
+        /* Save the device state */
         if (qemu_savevm(qemu_file) < 0)
             fprintf(stderr, "qemu save fail.\n");
     }
index 8535ba689ca24884ee624163e6a7d53b4b7a601d..a6b53aa3125744e8febce3993783a3c89e89449e 100644 (file)
@@ -843,6 +843,7 @@ void pci_cmd646_ide_init(PCIBus *bus, BlockDriverState **hd_table,
 void pci_piix3_ide_init(PCIBus *bus, BlockDriverState **hd_table, int devfn);
 int pmac_ide_init (BlockDriverState **hd_table,
                    SetIRQFunc *set_irq, void *irq_opaque, int irq);
+void ide_stop_dma_thread(void);
 
 /* cdrom.c */
 int cdrom_read_toc(int nb_sectors, uint8_t *buf, int msf, int start_track);
index 001fe2a6e4d6a64e6c97ea82c4d1ece5c32e062d..a5e51e84e870158bb11239c61af04ef413c122c5 100644 (file)
@@ -281,6 +281,14 @@ int xc_hvm_restore(int xc_handle, int io_fd,
     else
         shared_page_nr = (v_end >> PAGE_SHIFT) - 1;
 
+    /* Paranoia: clean pages. */
+    if ( xc_clear_domain_page(xc_handle, dom, shared_page_nr) ||
+         xc_clear_domain_page(xc_handle, dom, shared_page_nr-1) ||
+         xc_clear_domain_page(xc_handle, dom, shared_page_nr-2) ) {
+        ERROR("error clearing comms frames!\n");
+        goto out;
+    }
+
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, shared_page_nr-1);
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN, shared_page_nr-2);
     xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN, shared_page_nr);
index d1b44bd1c66233ce1868b719e781114663e15cce..c422e296e6d5e00c3535dae05ff3106ec0e4893f 100644 (file)
 #include <stdlib.h>
 #include <unistd.h>
 #include <sys/time.h>
-#include <xen/hvm/e820.h>
 
 #include "xc_private.h"
 #include "xg_private.h"
 #include "xg_save_restore.h"
 
+#include <xen/hvm/e820.h>
+#include <xen/hvm/params.h>
+
 /*
 ** Default values for important tuning parameters. Can override by passing
 ** non-zero replacement values to xc_hvm_save().
@@ -49,12 +51,29 @@ static unsigned long max_mfn;
 /* virtual starting address of the hypervisor */
 static unsigned long hvirt_start;
 
-/* #levels of page tables used by the currrent guest */
+/* #levels of page tables used by the current guest */
 static unsigned int pt_levels;
 
 /* total number of pages used by the current guest */
 static unsigned long max_pfn;
 
+int xc_hvm_drain_io(int handle, domid_t dom)
+{
+    DECLARE_HYPERCALL;
+    xen_hvm_drain_io_t arg;
+    int rc;
+
+    hypercall.op     = __HYPERVISOR_hvm_op;
+    hypercall.arg[0] = HVMOP_drain_io;
+    hypercall.arg[1] = (unsigned long)&arg;
+    arg.domid = dom;
+    if ( lock_pages(&arg, sizeof(arg)) != 0 )
+        return -1;
+    rc = do_xen_hypercall(handle, &hypercall);
+    unlock_pages(&arg, sizeof(arg));
+    return rc;
+}
+
 /*
 ** During (live) save/migrate, we maintain a number of bitmaps to track
 ** which pages we have to send, to fixup, and to skip.
index cb3c6a4d1c788dca4e26cb4eedb0bdb45375f10f..0f77217978f8a2e00e5fdc8b4971a6915bb30335 100644 (file)
@@ -155,6 +155,8 @@ int xc_set_hvm_param(
 int xc_get_hvm_param(
     int handle, domid_t dom, int param, unsigned long *value);
 
+int xc_hvm_drain_io(int handle, domid_t dom);
+
 /* PowerPC specific. */
 int xc_prose_build(int xc_handle,
                    uint32_t domid,
index 63ffea4aad015c25ee8fac33ffd90ba99b4747af..2332ef4c6ccb68aed3432546ee93561ae21d0728 100644 (file)
@@ -229,6 +229,11 @@ __attribute__((weak)) int xc_set_hvm_param(
     return -ENOSYS;
 }
 
+__attribute__((weak)) int xc_hvm_drain_io(int handle, domid_t dom)
+{
+    return -ENOSYS;
+}
+
 /*
  * Local variables:
  * mode: C
index d03824bc0e24b81a33d798b4f12d577c298e11c6..09515203dea2b5bae341748eab6cc4176b4e6dab 100644 (file)
@@ -146,6 +146,48 @@ void hvm_do_resume(struct vcpu *v)
     }
 }
 
+/* Called from the tools when saving a domain to make sure the io
+ * request-response ring is entirely empty. */
+static int hvmop_drain_io(
+    XEN_GUEST_HANDLE(xen_hvm_drain_io_t) uop)
+{
+    struct xen_hvm_drain_io op;
+    struct domain *d;
+    struct vcpu *v;
+    ioreq_t *p;
+    int rc;
+
+    if ( copy_from_guest(&op, uop, 1) )
+        return -EFAULT;
+
+    if ( !IS_PRIV(current->domain) )
+        return -EPERM;
+
+    d = rcu_lock_domain_by_id(op.domid);
+    if ( d == NULL )
+        return -ESRCH;
+
+    rc = -EINVAL;
+    /* Can't do this to yourself, or to a domain without an ioreq ring */
+    if ( d == current->domain || !is_hvm_domain(d) || get_sp(d) == NULL )
+        goto out;
+
+    rc = 0;
+
+    domain_pause(d);  /* It's not safe to do this to running vcpus */
+    for_each_vcpu(d, v)
+    {
+        p = &get_vio(v->domain, v->vcpu_id)->vp_ioreq;
+        if ( p->state == STATE_IORESP_READY )
+            hvm_io_assist(v);
+    }
+    domain_unpause(d);
+
+ out:
+    rcu_unlock_domain(d);
+    return rc;
+}
+
 int hvm_domain_initialise(struct domain *d)
 {
     int rc;
@@ -916,6 +958,12 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg)
             guest_handle_cast(arg, xen_hvm_set_pci_link_route_t));
         break;
 
+    case HVMOP_drain_io:
+        rc = hvmop_drain_io(
+            guest_handle_cast(arg, xen_hvm_drain_io_t));
+        break;
+
+
     default:
     {
         gdprintk(XENLOG_WARNING, "Bad HVM op %ld.\n", op);
index f5680509490f03d74c3ca83ef00b3fc6a72d0109..f03ad288bb5d28eaeb8c8bd828bf8884519f9a96 100644 (file)
@@ -70,4 +70,12 @@ struct xen_hvm_set_pci_link_route {
 typedef struct xen_hvm_set_pci_link_route xen_hvm_set_pci_link_route_t;
 DEFINE_XEN_GUEST_HANDLE(xen_hvm_set_pci_link_route_t);
 
+/* Drain all outstanding qemu-dm IO responses from a domain's ioreq ring. */
+#define HVMOP_drain_io            5
+struct xen_hvm_drain_io {
+    domid_t  domid;
+};
+typedef struct xen_hvm_drain_io xen_hvm_drain_io_t;
+DEFINE_XEN_GUEST_HANDLE(xen_hvm_drain_io_t);
+
 #endif /* __XEN_PUBLIC_HVM_HVM_OP_H__ */